library(Seurat)
library(canceRbits)
library(dplyr)
library(patchwork)
library(DT)
library(SCpubr)
library(tibble)
library(dittoSeq)
library(scRepertoire)
library(reshape2)
library(viridis)
library(tidyr)
library(grDevices)
library(ggpubr)
library(ggplot2)
library(ggnewscale)
library(RColorBrewer)
library(scales)
library(enrichplot)
library(clusterProfiler)
library("org.Hs.eg.db")
library(DOSE)
library(msigdbr)
library(stringr)# Load seurat object containing single cell pre-processed and annotated data, in the metadata folder
srat <- readRDS(params$path_to_data)
meta <- srat@meta.data
meta$WHO <- "SD"
meta$WHO[meta$patient %in% c("NeoBCC007_post", "NeoBCC008_post", "NeoBCC012_post", "NeoBCC017_post")] <- "CR"
meta$WHO[meta$patient %in% c("NeoBCC004_post", "NeoBCC006_post", "NeoBCC010_post", "NeoBCC011_post")] <- "PR"
srat <- AddMetaData(srat, meta$WHO, col.name = "WHO")
srat$WHO <- factor(srat$WHO, levels = c("CR", "PR", "SD"))colors_B <- c(
"34" = "#ae017e",
"7" = "#377eb8",
"21" = "#f781bf",
"38" = "#fed976",
"15" = "#a6cee3" ,
"22" = "#e31a1c" ,
"3" = "#9e9ac8",
"4" = "#fdb462" ,
"24" = "#b3de69"
)
colors_clonotype = c("Small (1e-04 < X <= 0.001)" = "#8c6bb1",
"Medium (0.001 < X <= 0.01)" = "#41b6c4",
"Large (0.01 < X <= 0.1)" = "#fec44f",
"Hyperexpanded (0.1 < X <= 1)" = "#ce1256"
)
colors_Ig = c("IGHA1" = "#addd8e",
"IGHA2" = "#238443",
"IGHM" = "#dd3497" ,
"IGHD" = "#41b6c4",
"IGHG1" = "#fec44f",
"IGHG2" = "#fe9929",
"IGHG3" = "#ec7014",
"IGHG4" = "#fee391"
)s <- subset(srat, subset = anno_l1 %in% c("B cells","Plasma cells"))
s@meta.data$seurat_clusters <- factor(s@meta.data$seurat_clusters, levels=c("34","7", "21", "38",
"15", "22","3", "4", "24"))
p <- SCpubr::do_DimPlot(sample = s,
reduction = "umap",
label = TRUE,
repel = TRUE,
label.color = "black",
legend.position = "none",
group.by = "seurat_clusters",
font.size = 25,
pt.size = 0.5,
colors.use = colors_B) + theme_minimal() + NoLegend() + theme(text = element_text(size=20))
pIdents(s) <- s$seurat_clusters
genes <- list( "B" = c("MS4A1", "CD19"),
"Ag presentation" = c("HLA-DRA", "HLA-DRB1", "HLA-DPB1", "CD40"),
"Cytotox" = c("GZMK", "PRF1"),
"Mem" = "CD27",
"Plasma" = c("IGKC", "IGHG1", "CD38", "SDC1", "JCHAIN"),
"Cell state" = c("G2M.Score", "S.Score","percent.mito")
)
p <- SCpubr::do_DotPlot(sample = s,
features = genes,
font.size = 18,
legend.length = 12,
legend.type = "colorbar",
dot.scale = 8,
sequential.palette ="PiYG",
scale = TRUE,
sequential.direction = -1
)
pq1 <- SCpubr::do_BarPlot( s,
group.by = "seurat_clusters",
split.by = "WHO",
position = "fill",
flip = TRUE,
order=FALSE,
legend.position = "none",
font.size = 32 ,
colors.use = colors_B,
xlab = "",
ylab = ""
) + xlab("Response") + ylab("% of B and plasma cells")
q1DT::datatable(q1$data,
caption = ("Figure 5c"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))q1 <- SCpubr::do_BarPlot( s,
group.by = "seurat_clusters",
split.by = "Pathological.Response",
position = "fill",
flip = TRUE,
order=FALSE,
legend.position = "none",
font.size = 32 ,
colors.use = colors_B,
xlab = "",
ylab = ""
) + xlab("Response") + ylab("% of B and plasma cells")
q1This is the code used to create the combined expression
object from the BCR-Seq data. Due to personal confidentiality, the
object cannot be provided to GEO, but raw data are available on request
on EGA.
As default, the chunk related to BCR-Seq are not run.
# Run from raw BCR data stored in path_to_data or load the combined BCR data in the following chunk
# loading the data
vdj <- list()
dirs <- list.dirs(path = path_to_data, full.names = TRUE, recursive = FALSE)
samples <- c("NeoBCC007_post",
"NeoBCC008_post",
"NeoBCC012_post",
"NeoBCC017_post",
"NeoBCC004_post",
"NeoBCC005_post",
"NeoBCC006_post",
"NeoBCC010_post",
"NeoBCC011_post",
"NeoBCC014_post",
"NeoBCC015_post",
"NeoBCC018_post")
for (i in (samples)){
dir_TCR <- paste0(path_to_data, i, "_BCR_VDJ")
vdj[[i]] <- read.csv(paste0(dir_TCR, "/filtered_contig_annotations.csv"))
}
head(vdj[[1]])
# combine the contigs
combined <- combineBCR(vdj,
samples = gsub("_pre|_post", "", samples),
removeNA = TRUE, removeMulti = TRUE)
merge_combined <- bind_rows(combined, .id = "sample")if(params$accessibility == "unlock"){
combined <- readRDS(file.path(params$path_to_BCR))
srat <- RenameCells(srat,new.names = paste0(srat$patient, "_",gsub("(_1|_2|_3|_4|_5|_6|_7|_8|_9)*", "",colnames(srat))))
srat <- RenameCells(srat,new.names = gsub("_post", "",colnames(srat)))
srat$sample <- gsub("_post", "",srat$patient)
seurat <- combineExpression(combined, srat,
cloneCall = "strict",
group.by = "sample", chain = "both",
proportion = TRUE,
filterNA = TRUE)
s <- subset(seurat, subset = anno_l1 %in% c("B cells","Plasma cells") )
s@meta.data$cloneType <- factor(s@meta.data$cloneType, levels = unique(s$cloneType))
} else{
print("Please request access to the BCR-Seq data.")
}Done on the highly confident B and plasma cells with exactly 2 IGH and IGL chains
if(params$accessibility == "unlock"){
df <- s@meta.data
df <- df %>%
separate( CTgene, c("IGH", "IGL"), sep = "_") %>%
separate( IGH, c("V", "D", "J", "C"), "\\.")
s <- AddMetaData(s, df$C, col.name = "Ig_level")
sss <- subset(s, subset = Ig_level != "NA")
dim(sss)
sss@meta.data$cloneType <- factor(sss@meta.data$cloneType, levels = c("Hyperexpanded (0.1 < X <= 1)", "Large (0.01 < X <= 0.1)", "Medium (0.001 < X <= 0.01)", "Small (1e-04 < X <= 0.001)"))
# define cell group membership
Idents(sss) <- sss$cloneType
sss@meta.data$Ig_level <- factor(sss@meta.data$Ig_level, levels = c("IGHA1",
"IGHA2",
"IGHM",
"IGHD",
"IGHG1",
"IGHG2",
"IGHG3",
"IGHG4"))
q1 <- SCpubr::do_BarPlot( sss,
group.by = "Ig_level",
split.by = "anno_l1",
position = "fill",
flip = TRUE,
order=FALSE,
legend.position = "right",
font.size = 30 ,
colors.use = colors_Ig) +
xlab("") +
ylab("Frequency of Ig isotypes")
q1
DT::datatable(q1$data,
caption = ("Figure 5d"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))
q2 <- SCpubr::do_BarPlot( sss,
group.by = "Ig_level",
split.by = "cloneType",
position = "fill",
flip = TRUE,
order=FALSE,
legend.position = "right",
font.size = 30 ,
colors.use = colors_Ig ) +
xlab("") +
ylab("Frequency of Ig isotypes")
q2
p <- q1|q2
DT::datatable(q2$data,
caption = ("Figure 5d"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))
} else{
p <- "Please request access to the BCR-Seq data."
}if(params$accessibility == "unlock"){
Idents(s) <- s$seurat_clusters
s@meta.data$seurat_clusters <- factor(s@meta.data$seurat_clusters, levels=c("34","7", "21", "38",
"15", "22","3", "4", "24"))
s@meta.data$cloneType <- factor(s@meta.data$cloneType, levels = c("Hyperexpanded (0.1 < X <= 1)", "Large (0.01 < X <= 0.1)", "Medium (0.001 < X <= 0.01)", "Small (1e-04 < X <= 0.001)"))
# define cell group membership
Idents(s) <- s$cloneType
de_markers <- DElegate::FindAllMarkers2(s, replicate_column = "patient", method = "edger", min_fc = 0, min_rate = 0.1)
signature_h <- de_markers$feature[de_markers$group1=="Hyperexpanded (0.1 < X <= 1)" ]
background <- rownames(s)
# enricher for cnet plot
GO_H_gene_sets = msigdbr(species = "human", category = "H")
msigdbr_t2g = GO_H_gene_sets %>% dplyr::distinct(gs_name, gene_symbol) %>% as.data.frame()
ego_h <- enricher(gene = signature_h, universe = background, TERM2GENE = msigdbr_t2g)
b <- barplot(ego_h, title = "Hallmarks´enrichment of hyperexpanded clones")
tmp <- b$data
tmp$ID <- str_trunc(tmp$ID, 100, "right")
tmp$ID <- factor(tmp$ID, levels = tmp$ID[order(tmp$Count)])
p <- ggplot(tmp, aes(Count, ID)) +
geom_bar(stat = "identity", color="black", fill = colors_clonotype["Hyperexpanded (0.1 < X <= 1)"]) +
theme_classic()+
geom_text(
aes(label = (paste( "padj=", format(p.adjust,scientific = TRUE, digits = 2)))),
color = "black",
size = 6,
hjust=1,
position = position_dodge(0.5) ) +
theme(text = element_text(size=18)) +
ggtitle("Hyperexpanded clones")
p
DT::datatable(p$data,
caption = ("Figure 5f"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))
} else{
p <- "Please request access to the BCR-Seq data."
}## R version 4.3.0 (2023-04-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Vienna
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] stringr_1.5.1 msigdbr_7.5.1 DOSE_3.26.2 org.Hs.eg.db_3.17.0
## [5] AnnotationDbi_1.62.2 IRanges_2.34.1 S4Vectors_0.38.2 Biobase_2.60.0
## [9] BiocGenerics_0.46.0 clusterProfiler_4.8.3 enrichplot_1.20.3 scales_1.3.0
## [13] RColorBrewer_1.1-3 ggnewscale_0.4.10 tidyr_1.3.1 scRepertoire_1.10.1
## [17] dittoSeq_1.12.2 canceRbits_0.1.6 ggpubr_0.6.0.999 ggplot2_3.5.1
## [21] viridis_0.6.5 viridisLite_0.4.2 reshape2_1.4.4 tibble_3.2.1
## [25] SCpubr_2.0.2 DT_0.32 patchwork_1.2.0 dplyr_1.1.4
## [29] Seurat_5.0.3 SeuratObject_5.0.1 sp_2.1-3
##
## loaded via a namespace (and not attached):
## [1] fs_1.6.4 matrixStats_1.2.0 spatstat.sparse_3.0-3
## [4] bitops_1.0-7 HDO.db_0.99.1 httr_1.4.7
## [7] doParallel_1.0.17 tools_4.3.0 sctransform_0.4.1
## [10] backports_1.4.1 utf8_1.2.4 R6_2.5.1
## [13] vegan_2.6-4 lazyeval_0.2.2 uwot_0.1.16
## [16] mgcv_1.9-1 permute_0.9-7 withr_3.0.0
## [19] gridExtra_2.3 progressr_0.14.0 cli_3.6.2
## [22] spatstat.explore_3.2-7 fastDummies_1.7.3 scatterpie_0.2.1
## [25] isoband_0.2.7 labeling_0.4.3 sass_0.4.9
## [28] spatstat.data_3.0-4 ggridges_0.5.6 pbapply_1.7-2
## [31] yulab.utils_0.1.4 gson_0.1.0 stringdist_0.9.12
## [34] parallelly_1.37.1 limma_3.56.2 RSQLite_2.3.5
## [37] VGAM_1.1-10 rstudioapi_0.16.0 generics_0.1.3
## [40] gridGraphics_0.5-1 ica_1.0-3 spatstat.random_3.2-3
## [43] crosstalk_1.2.1 car_3.1-2 GO.db_3.17.0
## [46] Matrix_1.6-5 ggbeeswarm_0.7.2 fansi_1.0.6
## [49] abind_1.4-5 lifecycle_1.0.4 edgeR_3.42.4
## [52] yaml_2.3.8 carData_3.0-5 SummarizedExperiment_1.30.2
## [55] qvalue_2.32.0 Rtsne_0.17 blob_1.2.4
## [58] grid_4.3.0 promises_1.2.1 crayon_1.5.2
## [61] miniUI_0.1.1.1 lattice_0.22-6 cowplot_1.1.3
## [64] KEGGREST_1.40.1 pillar_1.9.0 knitr_1.45
## [67] fgsea_1.26.0 GenomicRanges_1.52.1 future.apply_1.11.1
## [70] codetools_0.2-19 fastmatch_1.1-4 leiden_0.4.3.1
## [73] glue_1.7.0 downloader_0.4 ggfun_0.1.5
## [76] data.table_1.15.2 treeio_1.24.3 vctrs_0.6.5
## [79] png_0.1-8 spam_2.10-0 gtable_0.3.5
## [82] assertthat_0.2.1 cachem_1.1.0 xfun_0.43
## [85] S4Arrays_1.0.6 mime_0.12 tidygraph_1.3.1
## [88] survival_3.5-8 DElegate_1.2.1 SingleCellExperiment_1.22.0
## [91] pheatmap_1.0.12 iterators_1.0.14 fitdistrplus_1.1-11
## [94] ROCR_1.0-11 nlme_3.1-164 ggtree_3.13.0.001
## [97] bit64_4.0.5 RcppAnnoy_0.0.22 evd_2.3-6.1
## [100] GenomeInfoDb_1.36.4 bslib_0.6.2 irlba_2.3.5.1
## [103] vipor_0.4.7 KernSmooth_2.23-22 DBI_1.2.2
## [106] colorspace_2.1-0 ggrastr_1.0.2 tidyselect_1.2.1
## [109] bit_4.0.5 compiler_4.3.0 SparseM_1.81
## [112] DelayedArray_0.26.7 plotly_4.10.4 shadowtext_0.1.3
## [115] lmtest_0.9-40 digest_0.6.35 goftest_1.2-3
## [118] spatstat.utils_3.0-4 rmarkdown_2.26 XVector_0.40.0
## [121] htmltools_0.5.8 pkgconfig_2.0.3 sparseMatrixStats_1.12.2
## [124] MatrixGenerics_1.12.3 highr_0.10 fastmap_1.2.0
## [127] rlang_1.1.4 htmlwidgets_1.6.4 shiny_1.8.1
## [130] farver_2.1.2 jquerylib_0.1.4 zoo_1.8-12
## [133] jsonlite_1.8.8 BiocParallel_1.34.2 GOSemSim_2.26.1
## [136] RCurl_1.98-1.14 magrittr_2.0.3 GenomeInfoDbData_1.2.10
## [139] ggplotify_0.1.2 dotCall64_1.1-1 munsell_0.5.1
## [142] Rcpp_1.0.12 evmix_2.12 babelgene_22.9
## [145] ape_5.8 reticulate_1.35.0 truncdist_1.0-2
## [148] stringi_1.8.4 ggalluvial_0.12.5 ggraph_2.2.1
## [151] zlibbioc_1.46.0 MASS_7.3-60.0.1 plyr_1.8.9
## [154] parallel_4.3.0 listenv_0.9.1 ggrepel_0.9.5
## [157] forcats_1.0.0 deldir_2.0-4 Biostrings_2.68.1
## [160] graphlayouts_1.1.1 splines_4.3.0 tensor_1.5
## [163] locfit_1.5-9.9 igraph_2.0.3 spatstat.geom_3.2-9
## [166] cubature_2.1.0 ggsignif_0.6.4 RcppHNSW_0.6.0
## [169] evaluate_0.23 foreach_1.5.2 tweenr_2.0.3
## [172] httpuv_1.6.15 RANN_2.6.1 purrr_1.0.2
## [175] polyclip_1.10-6 future_1.33.2 scattermore_1.2
## [178] ggforce_0.4.2 broom_1.0.5 xtable_1.8-4
## [181] tidytree_0.4.6 RSpectra_0.16-1 rstatix_0.7.2
## [184] later_1.3.2 gsl_2.1-8 aplot_0.2.3
## [187] beeswarm_0.4.0 memoise_2.0.1 cluster_2.1.6
## [190] powerTCR_1.20.0 globals_0.16.3